Analysis of zebrafish pineal gland cell types. For original data see Shainer et al. 2019 (https://www.sciencedirect.com/science/article/pii/S0960982219305561, pineal sample 1).
library(dplyr)
library(Seurat)
library(Matrix)
library(ggplot2)
library(reticulate)
library(stringr)
library(ggpubr)
#load files sample 1
pineal_s1_cellr_101 <- readRDS("/zstorage/cellr_vs_kallisto/rds_files/D_rerio.GRCz11.101/dr_pineal_s1_cellr.rds")
pineal_s1_kb_101 <- readRDS("/zstorage/cellr_vs_kallisto/rds_files/D_rerio.GRCz11.101/dr_pineal_s1_kb.rds")
pineal_s1_kb_forced_101 <- readRDS("/zstorage/cellr_vs_kallisto/rds_files/D_rerio.GRCz11.101/dr_pineal_s1_kb_forced.rds")
Calculate the percentage of mitochondrial genes per cell.
pineal_s1_kb_101[["percent.mt"]] <- PercentageFeatureSet(object = pineal_s1_kb_101, pattern = "^mt-")
Visualize QC metrics.
VlnPlot(object = pineal_s1_kb_101,
features = c("nFeature_RNA", "nCount_RNA", "percent.mt"),
ncol = 3,
pt.size=0)
Total number of cells before filtration:
sum(table(...=pineal_s1_kb_101@active.ident))
## [1] 2675
Filtration of outlier cells containing unusual number of genes, UMI or percentage of mitochondrial genes. Plot the distribution of the filtered cells.
pineal_s1_kb_101 <- subset(x = pineal_s1_kb_101,
subset = nFeature_RNA > 200
& nCount_RNA < 15000
& percent.mt<30)
VlnPlot(object = pineal_s1_kb_101,
features = c("nFeature_RNA", "nCount_RNA", "percent.mt"),
ncol = 3,
pt.size=0)
Total number of cells after filtration:
sum(table(...=pineal_s1_kb_101@active.ident))
## [1] 2199
Standard normalization, variable gene identification and scaling:
pineal_s1_kb_101 <- NormalizeData(object = pineal_s1_kb_101,
normalization.method = "LogNormalize",
scale.factor = 10000)
pineal_s1_kb_101 <- FindVariableFeatures(object = pineal_s1_kb_101,
selection.method = "vst",
nfeatures = 2000)
all_genes_kallisto_s1 <- rownames(x = pineal_s1_kb_101)
pineal_s1_kb_101 <- ScaleData(object = pineal_s1_kb_101, features = all_genes_kallisto_s1)
principal component analysis.
pineal_s1_kb_101 <- RunPCA(object = pineal_s1_kb_101, features = VariableFeatures(object = pineal_s1_kb_101))
## PC_ 1
## Positive: selenop, krt8, actb2, atp1b1a, cd63, zgc:162730, actb1, s100a10b, junba, cxcl18b
## atp1a1b, ctsla, icn, si:ch73-335l21.4, CR318588.4, zfp36l1b, sparc, fabp7a, krt18a.1, npc2
## fosl1a, tagln2, btg2, sgk1, fxyd1, nfkbiaa, gstp1, glulb, fosb, angptl4
## Negative: rbp4l, tph2, pde6ha, rcvrn2, gngt1, pde6ga, exorh, saga, rcvrn3, arl3l1
## gnat1, rbp3, BX004774.2, nrgna, tulp1b, cetn4, arl3l2, tulp1a, pde6gb, ahcy
## asmt, si:dkey-220f10.4, cnga1a, ddc, unc119.2, gch1, cldn2, PLEKHB1, cngb3.2, ppdpfa
## PC_ 2
## Positive: clu, fabp7a, atp1a1b, hsp70l, vim, krt18a.1, stra6, bckdhbl, bag3, cdkn1a
## ubb, hspb8, sparc, rpe65a, slc38a3a, btg2, scg3, s100b, wfdc2, proca1
## cxcl18b, si:ch211-80h18.1, fabp7b, zgc:114181, rgrb, dkk3b, rbp1.1, slc13a1, si:ch211-81a5.8, cbx7a
## Negative: si:dkey-27i16.2, cx32.2, fcer1gl, ctss2.2, zgc:64051, ccl35.1, lgals3bpb, si:cabz01074946.1, cd74b, cd74a
## mhc2a, spi1b, stoml3b, havcr1, fermt3b, ccl34b.1, slc7a7, ccl35.2, ncf1, apoc1
## cxcr4b, BX649485.1, lgals9l1, ms4a17a.9, ctsc, cebpa, laptm5, grna, mpeg1.1, pfn1
## PC_ 3
## Positive: si:ch211-81a5.8, zgc:114181, ndrg1a, bckdhbl, slc13a1, dkk3b, fabp7b, gstm.3, si:dkey-11c5.11, si:ch211-80h18.1
## rbp1.1, si:dkey-12l12.1, vim, AL954359.2, tmem98, mstnb, zgc:153311, flr, zgc:158404, igfbp2a
## foxj1a, tmem72, tbata, bmp1b, mt2, mcl1a, ppdpfa, proca1, ackr3a, rbp2b
## Negative: si:dkey-33c12.3, snap25a, chga, bdnf, elavl3, vgf, elavl4, anxa13l, id4, gng13b
## stmn4l, pcp4a, si:dkeyp-69c1.7, ywhag2, rtn1b, uts2a, nell2b, nxph1, hpcal4, nrsn1
## cart3, p2rx2, atp6v1b2, stmn2a, sncga, tmsb2, insm1b, atp1a3a, pam, scg2b
## PC_ 4
## Positive: zgc:114181, dkk3b, slc13a1, fabp7b, igfbp2a, gstm.3, si:dkey-12l12.1, rbp1.1, mstnb, ahcyl1
## flr, rbp4, bckdhbl, si:dkey-11c5.11, b3glcta, AL954359.2, tmem98, zgc:153311, slc38a3a, si:ch211-80h18.1
## ndrg1a, ackr3a, tmem72, si:ch211-81a5.8, C7H20orf27, bmp1b, tbata, zgc:158404, vim, enkur
## Negative: rdh5, rgra, asip2b, rbp5, fxyd6l, si:ch211-251b21.1, rlbp1b, cxcl14, efhd1, syt5b
## scg3, jhy, ppp1r14aa, slc3a2a, slc6a2, slc1a3a, cldn7a, her15.2, tagln2, fdx1
## coch, prdx1, myl9b, lgals2a, bzw1b, c1qtnf5, rpe65a, cnn2, cdon, txn
## PC_ 5
## Positive: dcn, CABZ01092746.1, ccl25b, pmp22a, cldn11a, sost, fsta, twist1a, pcolcea, msx1b
## lxn, thbs4b, anxa1a, bhmt, si:ch1073-291c23.2, clec3ba, cygb1, tmem176, selenbp1, eif4ebp3
## srgn, igfbp5b, si:dkey-11f4.20, zgc:158343, fstb, si:ch73-86n18.1, crhbp, adh8a, cxcl12a, serpinf1
## Negative: ctsd, stra6, scg3, rpe65a, fabp7a, lgals2a, hspb8, rgrb, atp1b1a, nanos1
## efhd1, rgra, rdh5, rlbp1b, clu, si:dkey-112a7.4, actb1, si:ch211-251b21.1, jhy, stmn1b
## bag3, syt5b, rnaseka, keap1a, smox, fdx1, cdkn1a, itm2cb, asip2b, uchl1
Visualize the principal components percentage of variance by an elbow plot.
ElbowPlot(object = pineal_s1_kb_101, ndims = 30)
PCs 1-25 were used as dimensions of reduction to compute the k.param nearest neighbors
pineal_s1_kb_101 <- FindNeighbors(object = pineal_s1_kb_101, dims = 1:25)
pineal_s1_kb_101 <- FindClusters(object = pineal_s1_kb_101, resolution = 1.5)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 2199
## Number of edges: 72904
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8244
## Number of communities: 20
## Elapsed time: 0 seconds
pineal_s1_kb_101 <- RunUMAP(object = pineal_s1_kb_101, dims = 1:25)
kb_UMAP_unmerged_s1 <- DimPlot(object = pineal_s1_kb_101, reduction = "umap",
label=TRUE, pt.size = 0.5, label.size = 3) +
theme(legend.position="none",
axis.title.x=element_text(size=12),
axis.title.y=element_text(size=12),
plot.title = element_text(size=14, hjust=0.0)) + ggtitle("kallisto (res.=1.5)") +
theme(plot.title = element_text(size = 12))
kb_UMAP_unmerged_s1
Analysis of the top markers for each cluster.
pineal_s1_kb_101.markers <- FindAllMarkers(object = pineal_s1_kb_101,
only.pos = TRUE,
min.pct = 0.25,
logfc.threshold = 0.8)
pineal_s1_kb_101.markers %>% group_by(cluster) %>% top_n(n = 10, wt = avg_log2FC)
Dotplot of the top known markers of the pineal cell types (based on Shainer et al. 2019) as well as newly identify markers (such as col14a1b, dcn and ccr9a).
dot_plot_genes_s1= c("exorh", "gnat1", "gngt1",
"gnat2", "gngt2a", "col14a1b", "opn1lw1","parietopsin",
"asip2b", "rpe65a",
"dkk3b", "fabp7b",
"elavl4", "cart3",
"dcn", "igfbp5b",
"cahz", "hbaa1",
"ccr9a", "il4",
"cd74a", "apoc1",
"kdrl", "plvapa",
"bscl2l", "gch2",
"hbegfb", "fgl2a")
kallisto_dotplot_unmerged_s1<- DotPlot(pineal_s1_kb_101, features = dot_plot_genes_s1,
cluster.idents=FALSE, dot.scale=2) + RotatedAxis() +
theme(axis.text.x = element_text(angle=45, size=10),
axis.text.y = element_text(size=5, angle=0),
legend.title = element_text(size=10),
legend.text = element_text(size = 10),
axis.title.x=element_blank(),
axis.title.y=element_blank())
kallisto_dotplot_unmerged_s1
Markers separating the red- and green-like photoreceptors (clusters 15 & 17):
FeaturePlot(object=pineal_s1_kb_101, features = c("col14a1b", "parietopsin"), label = TRUE, label.size = 3)
How many cells in each cluster?
(table(...=pineal_s1_kb_101@active.ident))
## ...
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
## 339 224 186 180 166 160 150 144 141 90 89 84 49 48 37 31 22 22 19 18
Calculate the percentage of mitochondrial genes per cell.
pineal_s1_cellr_101[["percent.mt"]] <- PercentageFeatureSet(object = pineal_s1_cellr_101, pattern = "^mt-")
Visualize QC metrics.
VlnPlot(object = pineal_s1_cellr_101,
features = c("nFeature_RNA", "nCount_RNA", "percent.mt"),
ncol = 3,
pt.size=0)
Total number of cells before filtration:
sum(table(...=pineal_s1_cellr_101@active.ident))
## [1] 4982
Filtration of outlier cells containing unusual number of genes, UMI or percentage of mitochondrial genes. Plot the distribution of the filtered cells.
pineal_s1_cellr_101 <- subset(x = pineal_s1_cellr_101,
subset = nFeature_RNA > 200
& nCount_RNA < 15000
& percent.mt<30)
VlnPlot(object = pineal_s1_cellr_101,
features = c("nFeature_RNA", "nCount_RNA", "percent.mt"),
ncol = 3,
pt.size=0)
Total number of cells after filtration:
sum(table(...=pineal_s1_cellr_101@active.ident))
## [1] 4334
Standard normalization, variable gene identification and scaling:
pineal_s1_cellr_101 <- NormalizeData(object = pineal_s1_cellr_101,
normalization.method = "LogNormalize",
scale.factor = 10000)
pineal_s1_cellr_101 <- FindVariableFeatures(object = pineal_s1_cellr_101,
selection.method = "vst",
nfeatures = 2000)
all_genes_cellr_s1 <- rownames(x = pineal_s1_cellr_101)
pineal_s1_cellr_101 <- ScaleData(object = pineal_s1_cellr_101, features = all_genes_cellr_s1)
Principal component analysis.
pineal_s1_cellr_101 <- RunPCA(object = pineal_s1_cellr_101,
features = VariableFeatures(object = pineal_s1_cellr_101))
## PC_ 1
## Positive: rbp4l, pde6ha, exorh, gngt1, pde6ga, gnat1, saga, rbp3, pde6gb, tph2
## BX004774.2, arl3l1, nrgna, rcvrn2, tulp1a, cnga1a, tulp1b, rcvrn3, pde6a, ddc
## arl3l2, si:dkey-220f10.4, rcvrnb, asmt, aipl2, ppdpfa, cetn4, ahcy, guk1b, parapinopsinb
## Negative: selenop, krt8, zgc:162730, cd63, atp1b1a, s100a10b, si:ch73-335l21.4, junba, actb2, cxcl18b
## atp1a1b, icn, actb1, zfp36l1b, sparc, fxyd1, sgk1, angptl4, nfkbiaa, krt18a.1
## CR318588.4, jdp2b, btg2, fabp7a, tagln2, ccn1, ctsla, igfbp2a, fosb, serpinh1b
## PC_ 2
## Positive: si:dkey-27i16.2, fcer1gl, spi1b, ctss2.2, cd74a, cd74b, zgc:64051, lgals3bpb, ccl35.1, stoml3b
## havcr1, si:cabz01074946.1, mhc2a, cx32.2, slc7a7, cxcr4b, ncf1, pfn1, ms4a17a.9, ncf2
## arpc1b, BX649485.1, ccl34b.1, ccl35.2, ctsc, laptm5, mpeg1.1, lgals9l1, apoc1, fermt3b
## Negative: clu, atp1a1b, fabp7a, bckdhbl, vim, stra6, bag3, hspb8, krt18a.1, hsp70l
## si:ch211-81a5.8, cdkn1a, slc38a3a, hspa8b, cbx7a, scg3, wfdc2, si:ch211-80h18.1, proca1, ubb
## zgc:114181, slc13a1, dkk3b, mstnb, AL954359.2, s100b, fabp7b, lpl, cxcl18b, btg2
## PC_ 3
## Positive: si:dkey-33c12.3, chga, bdnf, elavl4, snap25a, vgf, elavl3, p2rx2, vav3b, anxa13l
## id4, stmn2a, nxph1, nell2b, tcf7l2, gng13b, hpcal4, syt1a, atp6v1b2, rtn1b
## ywhag2, nrsn1, scg2b, stmn4l, si:dkeyp-69c1.7, insm1b, atp1a3a, cart3, pam, map1b
## Negative: ppdpfa, si:ch211-81a5.8, pde6gb, rbp4l, pde6ha, BX004774.2, ahcy, gngt1, zgc:114181, rbp3
## bckdhbl, tulp1b, ndrg1a, dkk3b, nrgna, unc119.2, gnat1, slc13a1, exorh, pde6ga
## cnga1a, arl3l2, tulp1a, saga, pla1a, fabp7b, si:dkey-11c5.11, cetn4, pde6a, arl3l1
## PC_ 4
## Positive: hbaa1, hbba1.1, hbba1, hbba2, si:ch211-5k11.8, ccl25b, hbaa2, dcn, CABZ01092746.1, pmp22a
## sost, crhbp, cldn11a, fsta, si:dkey-11f4.20, pcolcea, cxcl12a, lxn, cahz, thbs4b
## twist1a, igfbp5b, bhmt, si:ch211-103n10.5, si:ch211-250g4.3, si:ch1073-291c23.2, msx1b, anxa1a, tmem176, eif4ebp3
## Negative: ctsd, fkbp4, scg3, stoml3b, atp6v0ca, vmp1, uchl1, hspa8b, cct4, cx32.2
## havcr1, lgals3bpb, CABZ01020840.1, spi1b, cbx7a, rdh5, slc7a7, ctss2.2, bag3, gnb1b
## rnaseka, sqstm1, ctsc, hsp70l, atp1b1a, si:dkey-112a7.4, tspan36, stra6, sb:cb1058, fcer1gl
## PC_ 5
## Positive: asip2b, rdh5, rgra, fxyd6l, si:ch211-251b21.1, rbp5, efhd1, cxcl14, rlbp1b, syt5b
## jhy, slc1a3a, her15.2, slc6a2, c1qtnf5, cldn7a, tagln2, slc3a2a, coch, ppp1r14aa
## scg3, cdo1, marcksl1b, prdx1, fdx1, cnn2, bzw1b, myl9b, cdon, lgals2a
## Negative: dkk3b, zgc:114181, slc13a1, fabp7b, pla1a, gstm.3, rbp1.1, bckdhbl, si:dkey-12l12.1, AL954359.2
## si:ch211-80h18.1, zgc:153311, mstnb, tmem98, si:dkey-11c5.11, cyp19a1b, slc38a3a, tmem72, tbata, zgc:158404
## slc13a3, b3glcta, vim, bmp1b, igfbp2a, rbp2b, ackr3a, C7H20orf27, ndrg1a, foxj1a
Visualize the principal components percentage of variance by an elbow plot.
ElbowPlot(object = pineal_s1_cellr_101, ndims = 30)
PCs 1-25 were used as dimensions of reduction to compute the k.param nearest neighbors
pineal_s1_cellr_101 <- FindNeighbors(object = pineal_s1_cellr_101, dims = 1:25)
pineal_s1_cellr_101 <- FindClusters(object = pineal_s1_cellr_101, resolution = 1.5)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 4334
## Number of edges: 159944
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8560
## Number of communities: 23
## Elapsed time: 0 seconds
pineal_s1_cellr_101 <- RunUMAP(object = pineal_s1_cellr_101, dims = 1:25)
cellr_UMAP_unmerged_s1 <- DimPlot(object = pineal_s1_cellr_101, reduction = "umap",
label=TRUE, pt.size = 0.5, label.size = 3) +
theme(legend.position="none",
axis.title.x=element_text(size=12),
axis.title.y=element_text(size=12),
plot.title = element_text(size=12, hjust=0.0)) + ggtitle("Cell Ranger (res.=1.5)")+
theme(plot.title = element_text(size = 12))
cellr_UMAP_unmerged_s1
Analysis of the top markers for each cluster.
pineal_s1_cellr_101.markers <- FindAllMarkers(object = pineal_s1_cellr_101,
only.pos = TRUE,
min.pct = 0.25,
logfc.threshold = 0.8)
pineal_s1_cellr_101.markers %>% group_by(cluster) %>% top_n(n = 10, wt = avg_log2FC)
Dotplot of the top known markers of the pineal cell types (based on Shainer et al. 2019) as well as newly identify markers (such as dcn and ccr9a).
cellranger_dotplot_unmerged_s1<- DotPlot(pineal_s1_cellr_101, features = dot_plot_genes_s1,
cluster.idents=FALSE, dot.scale=2) + RotatedAxis() +
theme(axis.text.x = element_text(angle=45, size=10),
axis.text.y = element_text(size=5, angle=0),
legend.title = element_text(size=10),
legend.text = element_text(size = 10),
axis.title.x=element_blank(),
axis.title.y=element_blank())
## Warning in FetchData(object = object, vars = features, cells = cells): The
## following requested variables were not found: col14a1b
cellranger_dotplot_unmerged_s1
Markers for red- and green-like photoreceptors are expressed in a single cluster (#14):
FeaturePlot(object=pineal_s1_cellr_101, features = c("opn1lw1", "parietopsin"), label = TRUE, label.size = 3)
How many cells in each cluster?
(table(...=pineal_s1_cellr_101@active.ident))
## ...
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
## 485 421 412 403 364 364 262 253 240 174 159 154 127 112 80 62 60 51 46 32
## 20 21 22
## 31 24 18
When using the same parameters, the two types of cone-like photoreceptors can be distinguished when the pre-processing is done with kallisto, but not Cell Ranger, even though Kallisto pre-processed data contain half of the total cells (2199 cells in kallisto, and 4334 cells in Cell Ranger), and 2/3 of the cone-like photoreceptors (53 in kallisto and 80 in Cell Ranger).
Increasing the resolution does not improve the identification of the two cone-like photoreceptors types in the Cell Ranger data (the green and red opsin expressing cells are still considered the same cluster) :
pineal_s1_cellr_101 <- FindNeighbors(object = pineal_s1_cellr_101, dims = 1:25)
pineal_s1_cellr_101 <- FindClusters(object = pineal_s1_cellr_101, resolution = 3.5)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 4334
## Number of edges: 159944
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7604
## Number of communities: 38
## Elapsed time: 0 seconds
pineal_s1_cellr_101 <- RunUMAP(object = pineal_s1_cellr_101, dims = 1:25)
cellr_UMAP_unmerged_s1_res_3_5 <- DimPlot(object = pineal_s1_cellr_101, reduction = "umap",
label=TRUE, pt.size = 0.5, label.size = 3) +
theme(legend.position="none",
axis.title.x=element_text(size=10),
axis.title.y=element_text(size=10),
plot.title = element_text(size=12, hjust=0.0)) + ggtitle("Cell Ranger (res.=3.5)")
cellr_UMAP_unmerged_s1_res_3_5
FeaturePlot(object=pineal_s1_cellr_101, features = c("opn1lw1", "parietopsin"), label = TRUE, label.size = 3)
Dotplot high res.
cellranger_dotplot_high_res_s1<- DotPlot(pineal_s1_cellr_101, features = dot_plot_genes_s1,
cluster.idents=FALSE, dot.scale=2) + RotatedAxis() +
theme(axis.text.x = element_text(angle=45, size=10),
axis.text.y = element_text(size=5, angle=0),
legend.title = element_text(size=10),
legend.text = element_text(size = 10),
axis.title.x=element_blank(),
axis.title.y=element_blank())
## Warning in FetchData(object = object, vars = features, cells = cells): The
## following requested variables were not found: col14a1b
cellranger_dotplot_high_res_s1
This shows that similar to the results observed for the pineal sample 2, the additional type of the photoreceptors can only be detected under standard conditions in the kallisto pre-processed data, but not for the Cell Ranger pre-processed data.
For sample number 1, additional differences were observed between the Cell Ranger and the kallisto pre-processed data. Two Cell Ranger clusters (#15 & #21, when analyzed with resolution of 1.5) did not express any of the described pineal markers (see dotplot). By exploring the markers of those cluster, we identified cluster 21 to be pigment cells (expressing bscl2l & gch2 genes, which are known xanthophore markers) and cluster 15 to be a type of hematopoietic cells (expressing hbegfb & fgl2a).
FeaturePlot(object=pineal_s1_cellr_101, features = c("bscl2l", "gch2", "hbegfb", "fgl2a"), label = TRUE, label.size = 3)
Although these are not true pineal specific cell type, but rather present a contamination of “outside cells” such as the pigment cells (originating from the skin), or hematopoietic cells that reside in any tissue, these clusters seem to be unique to the Cell Ranger dataset and do not appear in the kallisto data set. Are those cells appear in kallisto and assigned to another cluster or are they filtered out? To test this, we checked whether the cell barcodes of the cells belonging to cluster 15 and 21 in the Cell Ranger data exist in the kallisto data, and if so in which cluster:
# list all of kallisto cell barcodes
kb_cell_barcodes<-data.frame(WhichCells(pineal_s1_kb_101))
names(kb_cell_barcodes)[1]="barcodes"
# list Cell Ranger cluster #21 cell barcodes
cellr_21_barcodes<-data.frame(WhichCells(pineal_s1_cellr_101, idents = "21"))
names(cellr_21_barcodes)[1]="barcodes"
cellr_21_barcodes$barcodes<-str_remove(cellr_21_barcodes$barcodes, "[-1]")
cellr_21_barcodes$barcodes<-str_remove(cellr_21_barcodes$barcodes, "[1]")
#plot Cell Ranger cluster #21 cell barcodes that exist in kallisto (kallisto UMAP)
cluster_21_in_kb<-DimPlot(pineal_s1_kb_101,
label=TRUE,
cells.highlight = c(intersect(cellr_21_barcodes, kb_cell_barcodes)),
label.size = 3) +
ggtitle("Cell Ranger cluster #21 cell barcodes in kallisto") +
theme(plot.title = element_text(size = 12, face="plain"))
# list Cell Ranger cluster #15 cell barcode
cellr_15_barcodes<-data.frame(WhichCells(pineal_s1_cellr_101, idents = "15"))
names(cellr_15_barcodes)[1]="barcodes"
cellr_15_barcodes$barcodes<-str_remove(cellr_15_barcodes$barcodes, "[-1]")
cellr_15_barcodes$barcodes<-str_remove(cellr_15_barcodes$barcodes, "[1]")
#plot Cell Ranger cluster #15 cell barcodes that exist in kallisto (kallisto UMAP)
cluster_15_in_kb<-DimPlot(pineal_s1_kb_101,
label=TRUE,
cells.highlight = c(intersect(cellr_15_barcodes, kb_cell_barcodes)),
label.size = 3) +
ggtitle("Cell Ranger cluster #15 cell barcodes in kallisto") +
theme(plot.title = element_text(size = 12, face="plain"))
unique_barcodes_plot<-ggarrange(cluster_21_in_kb, cluster_15_in_kb,
common.legend = TRUE,
labels = c("A", "B"),
ncol = 2, nrow = 1, legend = "right")
unique_barcodes_plot
Cell Ranger cluster #15 has 62 cells and cluster #21 has 24 cells, the number of the same cells that appear in kallisto:
# The number of Cell Ranger cluster #15 cell barcodes that exist in kallisto:
nrow(intersect(cellr_15_barcodes, kb_cell_barcodes))
## [1] 1
# The number of Cell Ranger cluster #21 cell barcodes that exist in kallisto:
nrow(intersect(cellr_21_barcodes, kb_cell_barcodes))
## [1] 16
Cluster #15 cells are almost completely filtered out in the kallisto dataset. These cells might be of low quality or are just lost. Cluster #21 cells mostly exist in the kallisto (but in a lower number), but do not form a unique cluster under the conditions set for the clustering of the data (UMAP shows that this cluster can be assigned manually or by other clustering parameters, similar to the case of the green-like photoreceptors described in the manuscript), or as a result of the lower number of cells of this type. This shows that some clusters can be observed in the Cell Ranger pre-processed data that do not appear in the kallisto data. In this particular case, these clusters represent an “uninteresting” information, as those are not true pineal gland cell types, but for other datasets it might be of a biological relevance.
Calculate the percentage of mitochondrial genes per cell.
pineal_s1_kb_forced_101[["percent.mt"]] <- PercentageFeatureSet(object = pineal_s1_kb_forced_101, pattern = "^mt-")
Visualize QC metrics.
VlnPlot(object = pineal_s1_kb_forced_101,
features = c("nFeature_RNA", "nCount_RNA", "percent.mt"),
ncol = 3,
pt.size=0)
Total number of cells before filtration:
sum(table(...=pineal_s1_kb_forced_101@active.ident))
## [1] 4980
Filteration of outlier cells containing unusual number of genes, UMI or percentage of mitochondrial genes. Plot the distribution of the filtered cells.
pineal_s1_kb_forced_101 <- subset(x = pineal_s1_kb_forced_101,
subset = nFeature_RNA > 200
& nCount_RNA < 15000
& percent.mt<30)
VlnPlot(object = pineal_s1_kb_forced_101,
features = c("nFeature_RNA", "nCount_RNA", "percent.mt"),
ncol = 3,
pt.size=0)
Total number of cells after filtration:
sum(table(...=pineal_s1_kb_forced_101@active.ident))
## [1] 4347
Standard normalization, variable gene identification and scaling:
pineal_s1_kb_forced_101 <- NormalizeData(object = pineal_s1_kb_forced_101,
normalization.method = "LogNormalize",
scale.factor = 10000)
pineal_s1_kb_forced_101 <- FindVariableFeatures(object = pineal_s1_kb_forced_101,
selection.method = "vst",
nfeatures = 2000)
all_genes_kallisto_forced_s1 <- rownames(x = pineal_s1_kb_forced_101)
pineal_s1_kb_forced_101 <- ScaleData(object = pineal_s1_kb_forced_101, features = all_genes_kallisto_forced_s1)
Principal component analysis.
pineal_s1_kb_forced_101 <- RunPCA(object = pineal_s1_kb_forced_101, features = VariableFeatures(object = pineal_s1_kb_forced_101))
## PC_ 1
## Positive: selenop, krt8, zgc:162730, cd63, atp1b1a, junba, cxcl18b, s100a10b, si:ch73-335l21.4, atp1a1b
## actb2, actb1, icn, zfp36l1b, krt18a.1, sparc, jdp2b, angptl4, fabp7a, fxyd1
## tagln2, fosb, nfkbiaa, btg2, sgk1, CR318588.4, fosl1a, ctsla, ccn1, serpinh1b
## Negative: rbp4l, pde6ha, gngt1, exorh, pde6ga, BX004774.2, pde6gb, gnat1, saga, tph2
## rbp3, nrgna, cnga1a, rcvrn2, rcvrn3, rcvrnb, arl3l1, tulp1a, asmt, si:dkey-220f10.4
## arl3l2, aipl2, ddc, cngb3.2, ppdpfa, cldn2, ahcy, CABZ01073265.1, parapinopsinb, si:ch211-245j22.3
## PC_ 2
## Positive: cx32.2, si:dkey-27i16.2, fcer1gl, cd74b, ctss2.2, cd74a, zgc:64051, ccl35.1, mhc2a, lgals3bpb
## si:cabz01074946.1, stoml3b, spi1b, havcr1, cxcr4b, slc7a7, ccl35.2, ccl34b.1, pfn1, ncf1
## fermt3b, BX649485.1, ms4a17a.9, apoc1, arpc1b, cebpa, laptm5, lgals9l1, si:ch211-102c2.4, ctsc
## Negative: clu, atp1a1b, fabp7a, hspa8b, bckdhbl, stra6, hsp70l, scg3, cbx7a, si:ch211-81a5.8
## vim, hspb8, krt18a.1, uchl1, cdkn1a, si:ch211-80h18.1, slc38a3a, slc13a1, wfdc2, s100b
## dkk3b, proca1, si:dkey-11c5.11, mdkb, rgrb, zgc:114181, fabp7b, AL954359.2, rbp1.1, rdh5
## PC_ 3
## Positive: rtn1b, elavl4, ywhag2, scg3, sncga, scg2b, syt1a, dusp5, si:dkey-33c12.3, snap25a
## bdnf, ctsd, cbx7a, stmn4l, slc35g2a, vgf, chga, map1b, atp1a3a, stmn2a
## id4, syngr3b, atp6v1b2, anxa13l, elavl3, si:ch73-119p20.1, uchl1, pcp4a, gng13b, gng3
## Negative: hbaa1, hbba1.1, hbba1, si:ch211-5k11.8, hbba2, hbaa2, si:ch211-103n10.5, rbp4, ccl25b, CABZ01092746.1
## dcn, pmp22a, sost, igfbp5b, cldn11a, anxa1a, fsta, pcolcea, igfbp2a, lxn
## bhmt, twist1a, thbs4b, msx1b, si:ch1073-291c23.2, clec3ba, eif4ebp3, serpinf1, vmo1a, si:dkey-11f4.20
## PC_ 4
## Positive: crhbp, si:dkey-33c12.3, chga, bdnf, elavl4, snap25a, vgf, elavl3, cxcl12a, anxa13l
## id4, si:dkeyp-69c1.7, uts2a, stmn2a, stmn4l, nxph1, nell2b, gng13b, p2rx2, hpcal4
## atp6v1b2, cart3, rtn1b, insm1b, nrsn1, atp1a3a, slc17a6b, syt1a, scg2b, pcp4a
## Negative: si:ch211-81a5.8, ndrg1a, ppdpfa, bckdhbl, si:dkey-11c5.11, slc13a1, zgc:114181, cx32.2, si:ch211-80h18.1, ahcy
## stoml3b, dkk3b, pde6gb, fabp7b, cetn4, vim, rbp1.1, unc119.2, lgals3bpb, AL954359.2
## arl3l2, gstm.3, ctss2.2, rbp3, nrgna, havcr1, spi1b, tulp1a, tmem98, UTP14C
## PC_ 5
## Positive: slc13a1, dkk3b, zgc:114181, fabp7b, gstm.3, rbp1.1, bckdhbl, si:dkey-12l12.1, si:ch211-80h18.1, mstnb
## zgc:153311, tmem98, b3glcta, AL954359.2, si:dkey-11c5.11, flr, tmem72, slc38a3a, ahcyl1, zgc:158404
## bmp1b, tbata, vim, rbp2b, foxj1a, cyp19a1b, ackr3a, enkur, fgfbp1b.1, mctp2b
## Negative: rdh5, asip2b, rgra, fxyd6l, rbp5, si:ch211-251b21.1, cxcl14, efhd1, rlbp1b, syt5b
## c1qtnf5, jhy, slc6a2, tagln2, slc3a2a, ppp1r14aa, slc1a3a, coch, her15.2, cldn7a
## scg3, fdx1, prdx1, cdon, myl9b, bzw1b, cnn2, slc1a2b, txn, marcksl1b
Visualize the principal components percentage of variance by an elbow plot.
ElbowPlot(object = pineal_s1_kb_forced_101, ndims = 30)
PCs 1-25 were used as dimensions of reduction to compute the k.param nearest neighbors
pineal_s1_kb_forced_101 <- FindNeighbors(object = pineal_s1_kb_forced_101, dims = 1:25)
pineal_s1_kb_forced_101 <- FindClusters(object = pineal_s1_kb_forced_101, resolution = 1.5)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 4347
## Number of edges: 168165
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8483
## Number of communities: 23
## Elapsed time: 0 seconds
pineal_s1_kb_forced_101 <- RunUMAP(object = pineal_s1_kb_forced_101, dims = 1:25)
kb_forced_UMAP_unmerged_s1_res_1_5 <- DimPlot(object = pineal_s1_kb_forced_101, reduction = "umap",
label=TRUE, pt.size = 0.5, label.size = 3) +
theme(legend.position="none",
axis.title.x=element_text(size=12),
axis.title.y=element_text(size=12),
plot.title = element_text(size=12, hjust=0.0)) + ggtitle("kallisto forced (res.=1.5)")
kb_forced_UMAP_unmerged_s1_res_1_5
Analysis of the top markers for each cluster.
pineal_s1_kb_forced_101.markers <- FindAllMarkers(object = pineal_s1_kb_forced_101,
only.pos = TRUE,
min.pct = 0.25,
logfc.threshold = 0.8)
pineal_s1_kb_forced_101.markers %>% group_by(cluster) %>% top_n(n = 10, wt = avg_log2FC)
Dotplot of the top known markers of the pineal cell types (based on Shainer et al. 2019) as well as newly identify markers (such as dcn and ccr9a).
kallisto_forced_dotplot_unmerged_s1<- DotPlot(pineal_s1_kb_forced_101, features = dot_plot_genes_s1,
cluster.idents=FALSE, dot.scale=2) + RotatedAxis() +
theme(axis.text.x = element_text(angle=45, size=10),
axis.text.y = element_text(size=5, angle=0),
legend.title = element_text(size=10),
legend.text = element_text(size = 10),
axis.title.x=element_blank(),
axis.title.y=element_blank())
kallisto_forced_dotplot_unmerged_s1
FeaturePlot(object=pineal_s1_kb_forced_101, features = c("opn1lw1", "parietopsin"), label = TRUE, label.size = 3)
Under a resolution of 1.5, the green- and red-like photoreceptors cannot be separated. Increasing the resolution in the case of kallisto forced enable to seprate those photoreceptors.
pineal_s1_kb_forced_101 <- FindNeighbors(object = pineal_s1_kb_forced_101, dims = 1:25)
pineal_s1_kb_forced_101 <- FindClusters(object = pineal_s1_kb_forced_101, resolution = 2.4)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 4347
## Number of edges: 168165
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7926
## Number of communities: 30
## Elapsed time: 0 seconds
pineal_s1_kb_forced_101 <- RunUMAP(object = pineal_s1_kb_forced_101, dims = 1:25)
kb_forced_UMAP_unmerged_s1_res_2_4 <- DimPlot(object = pineal_s1_kb_forced_101, reduction = "umap",
label=TRUE, pt.size = 0.5, label.size = 3) +
theme(legend.position="none",
axis.title.x=element_text(size=12),
axis.title.y=element_text(size=12),
plot.title = element_text(size=14, hjust=0.0)) + ggtitle("kallisto forced (res.=2.4)") +
theme(plot.title = element_text(size = 12))
kb_forced_UMAP_unmerged_s1_res_2_4
Dotplot high res
kallisto_forced_dotplot_hig_res_s1<- DotPlot(pineal_s1_kb_forced_101, features = dot_plot_genes_s1,
cluster.idents=FALSE, dot.scale=2) + RotatedAxis() +
theme(axis.text.x = element_text(angle=45, size=10),
axis.text.y = element_text(size=5, angle=0),
legend.title = element_text(size=10),
legend.text = element_text(size = 10),
axis.title.x=element_blank(),
axis.title.y=element_blank())
kallisto_forced_dotplot_hig_res_s1
umap_S1<- ggarrange(cellr_UMAP_unmerged_s1,
cellr_UMAP_unmerged_s1_res_3_5,
kb_UMAP_unmerged_s1,
kb_forced_UMAP_unmerged_s1_res_1_5,
kb_forced_UMAP_unmerged_s1_res_2_4,
labels = c("A", "B", "C", "D","E"),
common.legend = FALSE,
ncol = 1, nrow = 5) #legend = "right")
dotplots_s1<- ggarrange(cellranger_dotplot_unmerged_s1,
cellranger_dotplot_high_res_s1,
kallisto_dotplot_unmerged_s1,
kallisto_forced_dotplot_unmerged_s1,
kallisto_forced_dotplot_hig_res_s1,
common.legend = TRUE,
ncol = 1, nrow = 5, legend = "right")
ggarrange(umap_S1, dotplots_s1,
ncol = 2, nrow = 1, widths = c(1, 2))